Authors: James Brugler Calebe De Roure Marta Khomyn Max Prakoso Tails Putnins
# Load requried packages and print versions
import sys
import pandas as pd
import numpy as np
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
import datetime as dt
import multiprocessing as mp
import warnings
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
import matplotlib.lines as mlines
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
warnings.filterwarnings('ignore', category=RuntimeWarning)
print('Python version: ', sys.version) # dt and mp are part of standard python library
print('Pandas version: ', pd.__version__)
print('Numpy version: ', np.__version__)
print('Statsmodels version: ', sm.__version__)
pathdata = 'Data/'
Python version: 3.9.12 (main, Apr 4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)] Pandas version: 1.5.2 Numpy version: 1.21.5 Statsmodels version: 0.13.2
# Load the cash-rate data
dfcash = pd.read_excel(pathdata + 'f01d.xlsx',
skiprows = 10)
dfcash.columns = [x.lower() for x in dfcash.columns]
dictcolsrba = {'series id' : 'date',
'firmmcrtd' : 'onr',
'firmmcrid' : 'aonia'}
dfcash = dfcash[dictcolsrba.keys()]
dfcash.columns = [dictcolsrba[x] for x in dictcolsrba.keys()]
# Load the ASX data
dfasx = pd.read_excel(pathdata + 'sofia-beta-version_april_2024_corected.xlsx',
skiprows = 4,
skipfooter = 10)
dfasx.columns = [x.lower() for x in dfasx.columns]
dictcolsasx = {'date' : 'date',
'sofia overnight (%) vwap' : 'sofia_vwap_asx',
'sofia overnight (%) volume weighted median' : 'sofia_median_asx'}
dfasx = dfasx[dictcolsasx.keys()]
dfasx.columns = [dictcolsasx[x] for x in dictcolsasx.keys()]
# Make a single dataframe with both refrates
dffin = pd.merge(dfcash,
dfasx,
on = 'date',
how = 'inner')
# Load the new RBA data
dfrba = pd.read_excel(pathdata + 'allSOFIAslonger.xlsx')
dfrba.columns = [x.lower().replace('sofia_','sofia_vwap_').replace('sofiamedian','sofia_median') for x in dfrba.columns]
rbavwapcols = [x for x in dfrba.columns if x.find('vwap')>-1]
rbamediancols = [x for x in dfrba.columns if x.find('median')>-1]
rbacols = ['date']
rbacols.extend(rbavwapcols[:])
rbacols.extend(rbamediancols)
dffin = pd.merge(dffin,
dfrba[rbacols],
on = 'date',
how = 'left')
# Load RBA-replicated SOFIA and check it all lines up
dfsofiarep = pd.read_excel(pathdata + 'sofia_replicate.xlsx')
dfsofiarep.columns = [x.lower() for x in dfsofiarep.columns]
dictcolsrba = {'sofiamedian' : 'sofia_median_rbachck',
'sofia' : 'sofia_vwap_rbachck'}
dfsofiarep.rename(columns = dictcolsrba, inplace = True)
dffin = pd.merge(dffin,
dfsofiarep[['date','sofia_vwap_rbachck','sofia_median_rbachck']],
on = 'date',
how = 'left')
plt.scatter(dffin.sofia_vwap_asx, dffin.sofia_vwap_rba)
<matplotlib.collections.PathCollection at 0x1fc6a22fca0>
# Examine first five and last five rows
display(dffin.head())
display(dffin.tail())
| date | onr | aonia | sofia_vwap_asx | sofia_median_asx | sofia_vwap_rba | sofia_vwap_0000 | sofia_vwap_0040 | sofia_vwap_2525 | sofia_vwap_2525_2mn | sofia_vwap_0525 | sofia_vwap_0525_2mn | sofia_vwap_relparty | sofia_median_rba | sofia_median_0000 | sofia_median_0040 | sofia_median_2525 | sofia_median_2525_2mn | sofia_median_0525 | sofia_median_0525_2mn | sofia_median_relparty | sofia_vwap_rbachck | sofia_median_rbachck | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-01-04 | 0.1 | 0.04 | 0.015187 | 0.010 | 0.015187 | -0.005466 | 0.016484 | 0.01 | 0.01 | 0.012037 | 0.012049 | 0.015187 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.015187 | 0.01 |
| 1 | 2022-01-05 | 0.1 | 0.04 | 0.016911 | 0.020 | 0.016911 | 0.003159 | 0.018639 | 0.01 | 0.01 | 0.014611 | 0.014627 | 0.016680 | 0.02 | 0.01 | 0.02 | 0.01 | 0.01 | 0.01 | 0.01 | 0.02 | 0.016911 | 0.02 |
| 2 | 2022-01-06 | 0.1 | 0.04 | 0.017234 | 0.010 | 0.017234 | -0.020012 | 0.019043 | 0.01 | 0.01 | 0.013633 | 0.013633 | 0.015464 | 0.01 | 0.01 | 0.02 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.017234 | 0.01 |
| 3 | 2022-01-07 | 0.1 | 0.04 | 0.017647 | 0.020 | 0.017647 | -0.009158 | 0.019559 | 0.01 | 0.01 | 0.013813 | 0.013817 | 0.016611 | 0.02 | 0.01 | 0.02 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.017647 | 0.02 |
| 4 | 2022-01-10 | 0.1 | 0.04 | 0.017760 | 0.015 | 0.017794 | -0.004272 | 0.019743 | 0.01 | 0.01 | 0.015361 | 0.015368 | 0.017710 | 0.01 | 0.01 | 0.02 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.017794 | 0.01 |
| date | onr | aonia | sofia_vwap_asx | sofia_median_asx | sofia_vwap_rba | sofia_vwap_0000 | sofia_vwap_0040 | sofia_vwap_2525 | sofia_vwap_2525_2mn | sofia_vwap_0525 | sofia_vwap_0525_2mn | sofia_vwap_relparty | sofia_median_rba | sofia_median_0000 | sofia_median_0040 | sofia_median_2525 | sofia_median_2525_2mn | sofia_median_0525 | sofia_median_0525_2mn | sofia_median_relparty | sofia_vwap_rbachck | sofia_median_rbachck | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 802 | 2025-03-13 | 4.1 | 4.09 | 4.134282 | 4.13 | 4.134282 | 4.109091 | 4.136029 | 4.129060 | 4.129064 | 4.129060 | 4.129064 | 4.135223 | 4.13 | 4.13 | 4.14 | 4.13 | 4.13 | 4.13 | 4.13 | 4.13 | NaN | NaN |
| 803 | 2025-03-14 | 4.1 | 4.09 | 4.135620 | 4.14 | 4.135595 | 4.120111 | 4.136994 | 4.130000 | 4.130000 | 4.130000 | 4.130000 | 4.135913 | 4.14 | 4.13 | 4.14 | 4.13 | 4.13 | 4.13 | 4.13 | 4.14 | NaN | NaN |
| 804 | 2025-03-17 | 4.1 | 4.09 | 4.136397 | 4.14 | 4.136411 | 4.127051 | 4.138014 | 4.130000 | 4.130000 | 4.130000 | 4.130000 | 4.136716 | 4.14 | 4.13 | 4.14 | 4.13 | 4.13 | 4.13 | 4.13 | 4.14 | NaN | NaN |
| 805 | 2025-03-18 | 4.1 | 4.09 | 4.135447 | 4.14 | 4.135447 | 4.127629 | 4.136809 | 4.130000 | 4.130000 | 4.130000 | 4.130000 | 4.135597 | 4.14 | 4.13 | 4.14 | 4.13 | 4.13 | 4.13 | 4.13 | 4.14 | NaN | NaN |
| 806 | 2025-03-19 | 4.1 | 4.09 | 4.134332 | 4.13 | 4.134332 | 4.119904 | 4.135703 | 4.129619 | 4.129626 | 4.129619 | 4.129626 | 4.134758 | 4.13 | 4.13 | 4.13 | 4.13 | 4.13 | 4.13 | 4.13 | 4.13 | NaN | NaN |
# Examine the sum-stats for the data
display(dffin.describe().T)
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| onr | 807.0 | 3.252416 | 1.484464 | 0.100000 | 2.600000 | 4.100000 | 4.350000 | 4.350000 |
| aonia | 807.0 | 3.223234 | 1.493535 | 0.040000 | 2.560000 | 4.070000 | 4.320000 | 4.340000 |
| sofia_vwap_asx | 807.0 | 3.230842 | 1.515500 | 0.006326 | 2.544225 | 4.092313 | 4.339667 | 4.404321 |
| sofia_median_asx | 807.0 | 3.229641 | 1.516166 | 0.010000 | 2.550000 | 4.100000 | 4.340000 | 4.400000 |
| sofia_vwap_rba | 803.0 | 3.231961 | 1.516501 | 0.006492 | 2.545580 | 4.092760 | 4.339666 | 4.404321 |
| sofia_vwap_0000 | 803.0 | 3.208980 | 1.521034 | -0.061389 | 2.524428 | 4.070573 | 4.325986 | 4.393291 |
| sofia_vwap_0040 | 803.0 | 3.237651 | 1.516800 | 0.014591 | 2.552440 | 4.099065 | 4.343170 | 4.410237 |
| sofia_vwap_2525 | 796.0 | 3.208022 | 1.519735 | -0.005351 | 2.523751 | 4.074365 | 4.328685 | 4.384996 |
| sofia_vwap_2525_2mn | 796.0 | 3.208065 | 1.519744 | -0.004947 | 2.523772 | 4.074373 | 4.328689 | 4.384994 |
| sofia_vwap_0525 | 800.0 | 3.221637 | 1.518471 | -0.002261 | 2.536070 | 4.086702 | 4.334538 | 4.393798 |
| sofia_vwap_0525_2mn | 800.0 | 3.221657 | 1.518460 | -0.001895 | 2.537901 | 4.086702 | 4.334543 | 4.393807 |
| sofia_vwap_relparty | 799.0 | 3.228171 | 1.520115 | 0.001329 | 2.544416 | 4.094379 | 4.340921 | 4.406417 |
| sofia_median_rba | 803.0 | 3.230741 | 1.517160 | 0.010000 | 2.550000 | 4.100000 | 4.340000 | 4.400000 |
| sofia_median_0000 | 803.0 | 3.222522 | 1.517355 | 0.010000 | 2.530000 | 4.080000 | 4.330000 | 4.400000 |
| sofia_median_0040 | 803.0 | 3.235405 | 1.516847 | 0.010000 | 2.550000 | 4.100000 | 4.340000 | 4.400000 |
| sofia_median_2525 | 796.0 | 3.209334 | 1.520262 | 0.000000 | 2.530000 | 4.070000 | 4.330000 | 4.390000 |
| sofia_median_2525_2mn | 796.0 | 3.209410 | 1.520306 | 0.000000 | 2.530000 | 4.075000 | 4.330000 | 4.390000 |
| sofia_median_0525 | 800.0 | 3.222675 | 1.519564 | 0.010000 | 2.550000 | 4.090000 | 4.340000 | 4.400000 |
| sofia_median_0525_2mn | 800.0 | 3.222713 | 1.519562 | 0.010000 | 2.550000 | 4.090000 | 4.340000 | 4.400000 |
| sofia_median_relparty | 799.0 | 3.227372 | 1.520153 | 0.010000 | 2.550000 | 4.100000 | 4.340000 | 4.400000 |
| sofia_vwap_rbachck | 761.0 | 3.175133 | 1.537583 | 0.006492 | 2.527913 | 4.089460 | 4.339163 | 4.404321 |
| sofia_median_rbachck | 761.0 | 3.173922 | 1.538279 | 0.010000 | 2.530000 | 4.090000 | 4.340000 | 4.400000 |
# Dictionaries for labelling plots and lists to analyze
dictlabels = {'sofia_vwap_asx' : 'ASX',
'sofia_vwap_rba' : 'RBA Rep.',
}
# Choose which rates to estimate
refextract_all = ['sofia_vwap_asx',
'sofia_vwap_rba']
# Decide if to round
roundflag, roundsig = 1, 4
# Create data for MP state-space estimation
"""
For each reference rate, create a Tx1 vector of observed rates and estimate the
univariate local level state-space model to the state variable and noise term for each
observation
"""
dictdata, dictcntrl, dictdf = {}, {}, {}
estlistall = []
for refextract in refextract_all:
T = len(dffin)
y = np.empty((T,1))
if roundflag == 0:
y[:,0] = dffin[refextract]
elif roundflag == 1:
y[:,0] = np.round(dffin[refextract], decimals = 2)
x = dffin[['onr']].values.reshape((T,1))
dictdata[refextract] = y
dictcntrl[refextract] = x
dictdf[refextract] = dffin.copy()
x = dictcntrl[refextract]
xonr = x[:,-1]
onrvar = x[:,-1].var()
y = dictdata[refextract]
datesccy = dictdf[refextract].date
estlistall.append(['full', refextract, y, xonr, onrvar, datesccy])
# Estimate each model in parallel using all but four cores
from stsp_ll_mods import stspestmp_ll
mp_cores = mp.cpu_count() - 4
pool = mp.Pool(mp_cores)
if __name__ == '__main__':
ssbyperccy = pool.map(stspestmp_ll, estlistall)
pool.close()
dfreginf = pd.concat(ssbyperccy)
# Plots and save
for refextract in refextract_all:
dfres = dfreginf[dfreginf.alt == refextract].copy()
dfplot = dfres.dfsignoise[0]
# print(dfres['alt'].iloc[0])
# display(dfplot.describe())
fig = plt.figure(figsize = (7, 3))
ax1 = fig.add_subplot(1,1,1)
l1 = ax1.plot(dfplot.datefmt,
dfplot.state_smooth, 'b-',
lw= 1, markersize = 3)
yearsFmt = DateFormatter('%b-%Y')
years = YearLocator()
months = MonthLocator()
ax1.autoscale_view()
ax1.xaxis.set_tick_params(labelbottom=True,
rotation = 90)
ax1.xaxis.set_major_formatter(yearsFmt)
ax1.xaxis.set_minor_locator(months)
ax1.grid(True, linestyle = 'dashed')
ax1.set_ylabel('Yield (%)')
aonia_leg= mlines.Line2D([], [], color='blue', lw = 1,
linestyle = '-', label = 'Smoothed state ' + dictlabels[refextract])
plt.legend(handles=[aonia_leg])
print(dfres.ressum[0])
dfplot.to_csv('Outputs/' + 'sign_noise_' + refextract + '_apr2025.csv',
index = False)
Statespace Model Results
==============================================================================
Dep. Variable: y No. Observations: 807
Model: llmod_wCntrls Log Likelihood 2565.672
Date: Tue, 17 Feb 2026 AIC -5123.343
Time: 14:43:20 BIC -5104.570
Sample: 0 HQIC -5116.134
- 807
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
state_var 8.931e-06 5.2e-07 17.170 0.000 7.91e-06 9.95e-06
obs_var 7.036e-05 1.29e-06 54.530 0.000 6.78e-05 7.29e-05
betavar0_1 1.0028 0.006 174.801 0.000 0.992 1.014
===================================================================================
Ljung-Box (L1) (Q): 12.65 Jarque-Bera (JB): 42447.56
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 1.06 Skew: -0.16
Prob(H) (two-sided): 0.66 Kurtosis: 38.55
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Statespace Model Results
==============================================================================
Dep. Variable: y No. Observations: 807
Model: llmod_wCntrls Log Likelihood 2540.345
Date: Tue, 17 Feb 2026 AIC -5072.690
Time: 14:43:20 BIC -5053.916
Sample: 0 HQIC -5065.481
- 807
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
state_var 9.77e-06 5.15e-07 18.985 0.000 8.76e-06 1.08e-05
obs_var 7.162e-05 1.4e-06 51.040 0.000 6.89e-05 7.44e-05
betavar0_1 1.0014 0.006 169.629 0.000 0.990 1.013
===================================================================================
Ljung-Box (L1) (Q): 11.13 Jarque-Bera (JB): 42362.65
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.97 Skew: -0.03
Prob(H) (two-sided): 0.81 Kurtosis: 38.52
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).